import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
df = pd.read_csv("density_clus2.csv")
df.head()
| G1_kVA/km | G1_Customers/km | G2_kVA/km | G2_Customers/km | G3_kVA/km | G3_Customers/km | G4_kVA/km | G4_Customers/km | |
|---|---|---|---|---|---|---|---|---|
| 0 | 4313.976565 | 33.399235 | 1862.268741 | 374.596046 | 1781.155627 | 79.110583 | 1653.390696 | 267.928974 |
| 1 | 4999.080363 | 6.422016 | 1250.887617 | 251.552125 | 1113.283437 | 216.461350 | 6861.336361 | 3.108008 |
| 2 | 4427.403090 | 23.219377 | 1747.982603 | 403.100475 | 1068.699000 | 183.344920 | 3823.443228 | 6.319578 |
| 3 | 5734.095832 | 4.980156 | 1586.394505 | 87.832745 | 737.681555 | 110.138170 | 931.246525 | 88.690145 |
| 4 | 7472.555055 | 4.415658 | 4458.329485 | 17.833318 | 672.131687 | 269.142386 | 211.275854 | 1.207291 |
df_sep = pd.DataFrame([])
for i in range(1,5):
df_temp = df[[f'G{i}_kVA/km',f'G{i}_Customers/km']]
df_temp.columns = ['kVA/km','Customers/km']
df_temp['group'] = i
df_temp = df_temp.dropna()#.sample(n=200)
df_sep = pd.concat([df_sep,df_temp])
/tmp/ipykernel_1677/2749130029.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_temp['group'] = i /tmp/ipykernel_1677/2749130029.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_temp['group'] = i /tmp/ipykernel_1677/2749130029.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_temp['group'] = i /tmp/ipykernel_1677/2749130029.py:5: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy df_temp['group'] = i
df_sep[['kVA/km_log','Customers/km_log']] = np.log(df_sep[['kVA/km','Customers/km']])
from matplotlib import animation
from IPython.display import display,HTML
from sklearn.cluster import KMeans
from functools import partial
def update_group(k,data,feature,group):
model = KMeans(n_clusters=k).fit(data[feature].values)
clus = model.predict(data[feature].values)
for a in ax:
a.clear()
sns.scatterplot(data=data,x='kVA/km',y='Customers/km',hue=clus,palette=sns.color_palette()[:k],linewidth=0,ax=ax[1])
sns.kdeplot(data=data,x='kVA/km',hue=clus,palette=sns.color_palette()[:k],common_norm=False,ax=ax[0],warn_singular=False)
sns.kdeplot(data=data,y='Customers/km',hue=clus,palette=sns.color_palette()[:k],common_norm=False,ax=ax[2],warn_singular=False)
ax[0].get_legend().remove()
ax[2].get_legend().remove()
fig.suptitle(f'Group {group} KMean k = {k}')
#plt.close()
#update_group(4,data=df_g[['kVA/km','Customers/km']])
#%config InlineBackend.figure_formats = ['svg']
plt.rcParams['figure.dpi'] = 150
group = 1
df_g = df_sep[df_sep['group'] == group]
fig = plt.figure(figsize=(10,10))
gs = fig.add_gridspec(4, 4)
ax = []
ax.append(fig.add_subplot(gs[0, :-1]))
ax.append(fig.add_subplot(gs[1:, :-1]))
ax.append(fig.add_subplot(gs[1:, -1]))
anim_gauss = animation.FuncAnimation(fig, partial(update_group,data=df_g,feature=['kVA/km','Customers/km'],group=group), frames=range(1,10),interval=500, repeat=True)
plt.close()
HTML(anim_gauss.to_jshtml())
fig = plt.figure(figsize=(10,10))
gs = fig.add_gridspec(4, 4)
ax = []
ax.append(fig.add_subplot(gs[0, :-1]))
ax.append(fig.add_subplot(gs[1:, :-1]))
ax.append(fig.add_subplot(gs[1:, -1]))
anim_gauss = animation.FuncAnimation(fig, partial(update_group,data=df_g,feature=['kVA/km_log','Customers/km_log'],group=group), frames=range(1,10),interval=500, repeat=True)
plt.close()
HTML(anim_gauss.to_jshtml())
#group = 1
df_g = df_sep[(df_sep['group'] == 2) | (df_sep['group'] == 3)].reset_index(drop=True)
fig = plt.figure(figsize=(10,10))
gs = fig.add_gridspec(4, 4)
ax = []
ax.append(fig.add_subplot(gs[0, :-1]))
ax.append(fig.add_subplot(gs[1:, :-1]))
ax.append(fig.add_subplot(gs[1:, -1]))
anim_gauss = animation.FuncAnimation(fig, partial(update_group,data=df_g,feature=['kVA/km','Customers/km'],group='2,3'), frames=range(1,10),interval=500, repeat=True)
plt.close()
HTML(anim_gauss.to_jshtml())
fig = plt.figure(figsize=(10,10))
gs = fig.add_gridspec(4, 4)
ax = []
ax.append(fig.add_subplot(gs[0, :-1]))
ax.append(fig.add_subplot(gs[1:, :-1]))
ax.append(fig.add_subplot(gs[1:, -1]))
anim_gauss = animation.FuncAnimation(fig, partial(update_group,data=df_g,feature=['kVA/km_log','Customers/km_log'],group='2,3'), frames=range(1,10),interval=500, repeat=True)
plt.close()
HTML(anim_gauss.to_jshtml())
group = 4
df_g = df_sep[df_sep['group'] == group]
fig = plt.figure(figsize=(10,10))
gs = fig.add_gridspec(4, 4)
ax = []
ax.append(fig.add_subplot(gs[0, :-1]))
ax.append(fig.add_subplot(gs[1:, :-1]))
ax.append(fig.add_subplot(gs[1:, -1]))
anim_gauss = animation.FuncAnimation(fig, partial(update_group,data=df_g,feature=['kVA/km','Customers/km'],group=group), frames=range(1,10),interval=500, repeat=True)
plt.close()
HTML(anim_gauss.to_jshtml())
fig = plt.figure(figsize=(10,10))
gs = fig.add_gridspec(4, 4)
ax = []
ax.append(fig.add_subplot(gs[0, :-1]))
ax.append(fig.add_subplot(gs[1:, :-1]))
ax.append(fig.add_subplot(gs[1:, -1]))
anim_gauss = animation.FuncAnimation(fig, partial(update_group,data=df_g,feature=['kVA/km_log','Customers/km_log'],group=group), frames=range(1,10),interval=500, repeat=True)
plt.close()
HTML(anim_gauss.to_jshtml())